setwd("~/Desktop/af-werx")
getwd()
## [1] "/Users/datasociety/Desktop/af-werx/data"
main_dir = "~/Desktop/af-werx"
data_dir = paste0(main_dir, "/data")
data_dir
## [1] "~/Desktop/af-werx/data"
plot_dir = paste0(main_dir, "/plots")
plot_dir
## [1] "~/Desktop/af-werx/plots"
setwd(data_dir)
getwd()
## [1] "/Users/datasociety/Desktop/af-werx/data"
#load("tidyr_tables.RData")
flights = nycflights13::flights
plot(c(1,4,6,7,8,11,44,66))
# read data from data sample
CMP = read.csv("ChemicalManufacturingProcess.csv", header = T, stringsAsFactors = F)
#View(CMP)
column_ids = c(1:4,14:16)
column_ids
## [1] 1 2 3 4 14 15 16
CMP_subset = CMP[,column_ids]
str(CMP_subset)
## 'data.frame': 176 obs. of 7 variables:
## $ Yield : num 38 42.4 42 41.4 42.5 ...
## $ BiologicalMaterial01 : num 6.25 8.01 8.01 8.01 7.47 6.12 7.48 6.94 6.94 6.94 ...
## $ BiologicalMaterial02 : num 49.6 61 61 61 63.3 ...
## $ BiologicalMaterial03 : num 57 67.5 67.5 67.5 72.2 ...
## $ ManufacturingProcess01: num NA 0 0 0 10.7 12 11.5 12 12 12 ...
## $ ManufacturingProcess02: num NA 0 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess03: num NA NA NA NA NA NA 1.56 1.55 1.56 1.55 ...
summary(CMP_subset$Yield)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35.25 38.75 39.97 40.18 41.48 46.34
boxplot(CMP_subset$Yield)
# make it orange and give it a name
boxplot(CMP_subset$Yield, col= "orange", main ="Yield Summary")
# display color names range 1:6
colors()[1:6]
## [1] "white" "aliceblue" "antiquewhite" "antiquewhite1"
## [5] "antiquewhite2" "antiquewhite3"
# display color help and examples
?colors
demo(colors)
##
##
## demo(colors)
## ---- ~~~~~~
##
## > ### ----------- Show (almost) all named colors ---------------------
## >
## > ## 1) with traditional 'graphics' package:
## > showCols1 <- function(bg = "gray", cex = 0.75, srt = 30) {
## + m <- ceiling(sqrt(n <- length(cl <- colors())))
## + length(cl) <- m*m; cm <- matrix(cl, m)
## + ##
## + require("graphics")
## + op <- par(mar=rep(0,4), ann=FALSE, bg = bg); on.exit(par(op))
## + plot(1:m,1:m, type="n", axes=FALSE)
## + text(col(cm), rev(row(cm)), cm, col = cl, cex=cex, srt=srt)
## + }
##
## > showCols1()
##
## > ## 2) with 'grid' package:
## > showCols2 <- function(bg = "grey", cex = 0.75, rot = 30) {
## + m <- ceiling(sqrt(n <- length(cl <- colors())))
## + length(cl) <- m*m; cm <- matrix(cl, m)
## + ##
## + require("grid")
## + grid.newpage(); vp <- viewport(w = .92, h = .92)
## + grid.rect(gp=gpar(fill=bg))
## + grid.text(cm, x = col(cm)/m, y = rev(row(cm))/m, rot = rot,
## + vp=vp, gp=gpar(cex = cex, col = cm))
## + }
##
## > showCols2()
## Loading required package: grid
##
## > showCols2(bg = "gray33")
##
## > ###
## >
## > ##' @title Comparing Colors
## > ##' @param col
## > ##' @param nrow
## > ##' @param ncol
## > ##' @param txt.col
## > ##' @return the grid layout, invisibly
## > ##' @author Marius Hofert, originally
## > plotCol <- function(col, nrow=1, ncol=ceiling(length(col) / nrow),
## + txt.col="black") {
## + stopifnot(nrow >= 1, ncol >= 1)
## + if(length(col) > nrow*ncol)
## + warning("some colors will not be shown")
## + require(grid)
## + grid.newpage()
## + gl <- grid.layout(nrow, ncol)
## + pushViewport(viewport(layout=gl))
## + ic <- 1
## + for(i in 1:nrow) {
## + for(j in 1:ncol) {
## + pushViewport(viewport(layout.pos.row=i, layout.pos.col=j))
## + grid.rect(gp= gpar(fill=col[ic]))
## + grid.text(col[ic], gp=gpar(col=txt.col))
## + upViewport()
## + ic <- ic+1
## + }
## + }
## + upViewport()
## + invisible(gl)
## + }
##
## > ## A Chocolate Bar of colors:
## > plotCol(c("#CC8C3C", paste0("chocolate", 2:4),
## + paste0("darkorange", c("",1:2)), paste0("darkgoldenrod", 1:2),
## + "orange", "orange1", "sandybrown", "tan1", "tan2"),
## + nrow=2)
##
## > ##' Find close R colors() to a given color {original by Marius Hofert)
## > ##' using Euclidean norm in (HSV / RGB / ...) color space
## > nearRcolor <- function(rgb, cSpace = c("hsv", "rgb255", "Luv", "Lab"),
## + dist = switch(cSpace, "hsv" = 0.10, "rgb255" = 30,
## + "Luv" = 15, "Lab" = 12))
## + {
## + if(is.character(rgb)) rgb <- col2rgb(rgb)
## + stopifnot(length(rgb <- as.vector(rgb)) == 3)
## + Rcol <- col2rgb(.cc <- colors())
## + uniqC <- !duplicated(t(Rcol)) # gray9 == grey9 (etc)
## + Rcol <- Rcol[, uniqC] ; .cc <- .cc[uniqC]
## + cSpace <- match.arg(cSpace)
## + convRGB2 <- function(Rgb, to)
## + t(convertColor(t(Rgb), from="sRGB", to=to, scale.in=255))
## + ## the transformation, rgb{0..255} --> cSpace :
## + TransF <- switch(cSpace,
## + "rgb255" = identity,
## + "hsv" = rgb2hsv,
## + "Luv" = function(RGB) convRGB2(RGB, "Luv"),
## + "Lab" = function(RGB) convRGB2(RGB, "Lab"))
## + d <- sqrt(colSums((TransF(Rcol) - as.vector(TransF(rgb)))^2))
## + iS <- sort.list(d[near <- d <= dist])# sorted: closest first
## + setNames(.cc[near][iS], format(zapsmall(d[near][iS]), digits=3))
## + }
##
## > nearRcolor(col2rgb("tan2"), "rgb")
## 0.0 21.1 25.8 29.5
## "tan2" "tan1" "sandybrown" "sienna1"
##
## > nearRcolor(col2rgb("tan2"), "hsv")
## 0.0000 0.0410 0.0618 0.0638 0.0667
## "tan2" "sienna2" "coral2" "tomato2" "tan1"
## 0.0766 0.0778 0.0900 0.0912 0.0918
## "coral" "sienna1" "sandybrown" "coral1" "tomato"
##
## > nearRcolor(col2rgb("tan2"), "Luv")
## 0.00 7.42 7.48 12.41 13.69
## "tan2" "tan1" "sandybrown" "orange3" "orange2"
##
## > nearRcolor(col2rgb("tan2"), "Lab")
## 0.00 5.56 8.08 11.31
## "tan2" "tan1" "sandybrown" "peru"
##
## > nearRcolor("#334455")
## 0.0867
## "darkslategray"
##
## > ## Now, consider choosing a color by looking in the
## > ## neighborhood of one you know :
## >
## > plotCol(nearRcolor("deepskyblue", "rgb", dist=50))
##
## > plotCol(nearRcolor("deepskyblue", dist=.1))
##
## > plotCol(nearRcolor("tomato", "rgb", dist= 50), nrow=3)
##
## > plotCol(nearRcolor("tomato", "hsv", dist=.12), nrow=3)
##
## > plotCol(nearRcolor("tomato", "Luv", dist= 25), nrow=3)
##
## > plotCol(nearRcolor("tomato", "Lab", dist= 18), nrow=3)
# get random set of color set
set.seed(2)
n_cols = ncol(CMP_subset)
col_sample = sample(colors(), n_cols)
col_sample
## [1] "lightgray" "lavenderblush4" "grey12" "grey88"
## [5] "gray51" "ivory4" "grey36"
boxplot(CMP_subset, col = col_sample)
# display histogram data without plotting
hist(CMP_subset$Yield, plot = F)
## $breaks
## [1] 35 36 37 38 39 40 41 42 43 44 45 46 47
##
## $counts
## [1] 1 3 16 31 39 32 21 20 10 2 0 1
##
## $density
## [1] 0.005681818 0.017045455 0.090909091 0.176136364 0.221590909
## [6] 0.181818182 0.119318182 0.113636364 0.056818182 0.011363636
## [11] 0.000000000 0.005681818
##
## $mids
## [1] 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5
##
## $xname
## [1] "CMP_subset$Yield"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
hist(CMP_subset$Yield, col=col_sample[1:3], xlab = "Yield", main = "Dist. of Yield" )
# using par(mfrow = c(x,x)) to designate the the layout of the below histograms
par(mfrow = c(1,2))
hist(CMP_subset$BiologicalMaterial01, col=col_sample[2], xlab = "Bio Material 1", main = "Dist. of Bio Material 1")
hist(CMP_subset$BiologicalMaterial02, col=col_sample[3], xlab = "Bio Material 2", main = "Dist. of Bio Material 2")
# display three histograms in dataframe view of 1 row, 3 columns
par(mfrow = c(1,3))
hist(CMP_subset$BiologicalMaterial01, col=col_sample[2], xlab = "Bio Material 1", main = "Dist. of Bio Material 1")
hist(CMP_subset$BiologicalMaterial02, col=col_sample[3], xlab = "Bio Material 2", main = "Dist. of Bio Material 2")
hist(CMP_subset$BiologicalMaterial03, col=col_sample[4], xlab = "Bio Material 3", main = "Dist. of Bio Material 3")
# creat plot with pch title, x/y-axis lables, and pch symbol mod with 2x enlargement, color =steelblue
plot(CMP_subset[,2],CMP_subset[,1], xlab = "Bio Material 1", ylab = "Yield", main = "Bio. Material 1 vs Yield",pch = 4, cex = 2, col ="steelblue")
# correlation matrix for quick analysis
pairs(CMP_subset[,1:4], pch=19, col="steelblue")
# install/load corrlot package
#install.packages("corrplot")
library(corrplot)
## corrplot 0.84 loaded
library(help="corrplot")
CMP_cor = cor(CMP_subset[,1:4])
#View(CMP_cor)
corrplot(CMP_cor, method = "pie")
# display mixed corrplot
corrplot.mixed(CMP_cor)
#install.packages("ggplot2")
library(ggplot2)
?ggplot2
ggp1 = ggplot(CMP_subset, aes(x=Yield))
ggp1
# Layered geom_histogram with binwidth 0.75, outline/fill color
ggp1 = ggp1 + geom_histogram(aes(y= ..density..), binwidth = 0.75, color="steelblue", fill="gray")
ggp1 = ggp1 + geom_density(alpha=.5, color="gray", fill="steelblue")
ggp1
# using labs function to add title and subtitle
ggp1 = ggp1 + labs(title="Distrobution", subtitle="Histogram & Density")
ggp1
ggp2 = ggplot(CMP_subset, aes(x=BiologicalMaterial01, y=Yield))
ggp2
ggp2 = ggp2 + geom_point()
ggp2
ggp2 = ggp2 +
geom_point(color="darkorange")+
geom_smooth(method = lm)+
labs(title="Bio. Material 1 vs Yield", subtitle = "Scatterplot with linear fit")
ggp2
ggtheme1 = theme_bw() + theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 16),
plot.title = element_text(size = 25),
plot.subtitle = element_text(size = 18))
ggp2 = ggp2 + ggtheme1
ggp2